/**
 *
 * \file        charconv.c
 *
 * \brief       Routines for dealing with converting between different
 *              character sets, such as UTF-8, ASCII, UNICODE, etc.
 *
 * \author      Pete McCormick
 *
 * \date        02/22/2006
 *
 * \note        UNICODE support is currently crippled
 *
 */

////////////////////////////////////////////////////////////////////////////////

#include "charconv.h"
//#include "fileutil.h"
#include "string_utils.h"
//#include "truth.h"
#include "errors.h"
#include "console.h"

#ifdef UNICODE_SUPPORTED

////////////////////////////////////////////////////////////////////////////////

#define CHARCONV_CFG_FILE               "\\SYS\\CHARCONV.CFG"
#define UTF8_CFG_SECTION_NAME           "utf8"

////////////////////////////////////////////////////////////////////////////////

typedef struct
{
  UTF8OPT convert;
} UTF8INFO;

////////////////////////////////////////////////////////////////////////////////

UTF8INFO UTF8;

// crippled unicode support until it is fully implemented

STRNUMPAIR UTF8Options[] =
{
    {"ASCII", UTF8_TO_ASCII},
    //{"UNICODE", UTF8_TO_UNICODE},
    {0, 0}                              // terminate the list
};

// look-up table for converting ASCII unprintable characters
// according to ISO standard 8859-1
const char Iso8859_1Conv[256] =   {
  //  0,   1,   2,   3,   4,   5,   6,   7,  8,   9,
    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',' ', ' ',
  // 10,  11,  12,  13,  14,  15,  16,  17, 18,  19,
    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
  // 20,  21,  22,  23,  24,  25,  26,  27, 28,  29,
    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
  // 30,  31,  32,  33,  34,  35,  36,  37, 38,  39,
    ' ', ' ', ' ', '!','\"',  '#', '$', '%','&', '\'',
  // 40,  41,  42,  43,  44,  45,  46,  47, 48,  49,
    '(', ')', '*', '+', ',', '-', '.', '/', '0', '1',
  // 50,  51,  52,  53,  54,  55,  56,  57, 58,  59,
    '2', '3', '4', '5', '6', '7', '8', '9',':', ';',
  // 60,  61,  62,  63,  64,  65,  66,  67, 68,  69,
    '<', '=', '>', '?', '@', 'A', 'B', 'C', 'D', 'E',
  // 70,  71,  72,  73,  74,  75,  76,  77, 78,  79,
    'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
  // 80,  81,  82,  83,  84,  85,  86,  87, 88,  89,
    'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y',
  // 90,  91,  92,  93,  94,  95,  96,  97, 98,  99,
    'Z', '[', '\\',']', '^', '_', '`', 'a', 'b','c',
  //100, 101, 102, 103, 104, 105, 106, 107,108, 109,
    'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k','l', 'm',
  //110, 111, 112, 113, 114, 115, 116, 117,118, 119,
    'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v','w',
  //120, 121, 122, 123, 124, 125, 126, 127,128, 129,
    'x', 'y', 'z', '{', '|', '}', '~', ' ', ' ',' ',
  //130, 131, 132, 133, 134, 135, 136, 137,138, 139,
    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',' ', ' ',
  //140, 141, 142, 143, 144, 145, 146, 147,148, 149,
    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',' ',
  //150, 151, 152, 153, 154, 155, 156, 157,158, 159,
    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',' ', ' ',
  //160, 161, 162, 163, 164, 165, 166, 167,168, 169,
    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',' ',
  //170, 171, 172, 173, 174, 175, 176, 177,178, 179,
    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ','2', '3',
  //180, 181, 182, 183, 184, 185, 186, 187,188, 189,
    ' ', ' ', ' ', '.', ' ', '1', ' ', ' ', ' ',' ',
  //190, 191, 192, 193, 194, 195, 196, 197,198, 199,
    ' ', ' ', 'A', 'A', 'A', 'A', 'A', 'A',' ', 'C',
  //200, 201, 202, 203, 204, 205, 206, 207,208, 209,
    'E', 'E', 'E', 'E', 'I', 'I', 'I', 'I','D', 'N',
  //210, 211, 212, 213, 214, 215, 216, 217,218, 219,
    'O', 'O', 'O', 'O', 'O', 'x', 'O', 'U', 'U', 'U',
  //220, 221, 222, 223, 224, 225, 226, 227,228, 229,
    'U', 'Y', 'P', 'B', 'a', 'a', 'a', 'a', 'a', 'a',
  //230, 231, 232, 233, 234, 235, 236, 237,238, 239,
    ' ', 'c', 'e', 'e', 'e', 'e', 'i', 'i','i', 'i',
  //240, 241, 242, 243, 244, 245, 246, 247,248, 249,
    'o', 'n', 'o', 'o', 'o', 'o', 'o', ' ','o','u',
  //250, 251, 252, 253, 254, 255
    'u', 'u', 'u', 'y', 'p', 'y'
};

////////////////////////////////////////////////////////////////////////////////

/**
 * \author      Pete McCormick
 *
 * \date        02/21/06
 *
 * \return      void
 *
 * \retval      void
 *
 * \brief       Takes a UTF-8 string and converts it in-place to an ASCII string.
 *
 * \param       pSrc
 * \param       destLen
 *
 * \note        Current strategy is to convert only certain sequences and
 *              replace the others with spaces.
 *
 * Special Conversions:
 *
 * 0xe2 0x80 0x98   ==> '
 * 0xe2 0x80 0x99   ==> '
 * 0xc2 0xb4        ==> '
 * 0xe2 0x80 0x9c   ==> "
 * 0xe2 0x80 0x9d   ==> "
 *
 */
void UTF8ToAscii(char * p, UINT32 destLen)
{
    unsigned char * pSrc = (unsigned char *)p;
    unsigned char * pDest = pSrc;

    while(*pSrc && (pSrc-(unsigned char *)p < destLen))
    {
        if(*pSrc & 0x80)
        {
            // something is encoded
            if((*pSrc & 0xe0) == 0xc0)
            {
                // two-byte encoding
                //printf("%x %x \n", pSrc[0], pSrc[1]);
                if((pSrc[0] == 0xc2) && (pSrc[1] == 0xb4))
                {
                    *pDest++ = '\'';
                }
                //else if((pSrc[0] == 0xc2) && (pSrc[1] == 0xa9))
                //{
                //    *pDest++ = '(';
                //    *pDest++ = 'c';
                //    *pDest++ = ')';
                //}
                else if((pSrc[0] <= 0xc3) && (pSrc[1] <= 0xbf))
                {
                    // allow values up to 0xff to remain as 1 character
                    //*pDest++ = ' ';
                    *pDest++ = ((pSrc[0] & 0x03)<<6) | (pSrc[1] & 0x3f);
                }
                else
                {
                    *pDest++ = ' ';
                }
                pSrc += 2;
            }
            else if((*pSrc & 0xf0) == 0xe0)
            {
                // 3-byte encoding
                //printf("%x %x %x\n", pSrc[0], pSrc[1], pSrc[2]);
                if((pSrc[0] == 0xe2))
                {
                    if((pSrc[1] == 0x80) && (pSrc[2] == 0x98))
                    {
                        *pDest++ = '\'';
                    }
                    else if((pSrc[1] == 0x80) && (pSrc[2] == 0x99))
                    {
                        *pDest++ = '\'';
                    }
                    else if((pSrc[1] == 0x80) && (pSrc[2] == 0x9c))
                    {
                        *pDest++ = '\"';
                    }
                    else if((pSrc[1] == 0x80) && (pSrc[2] == 0x9d))
                    {
                        *pDest++ = '\"';
                    }
                }
                else
                {
                    *pDest++ = ' ';
                }
                pSrc += 3;
            }
            else if((*pSrc & 0xf8) == 0xf0)
            {
                // 4-byte encoding
                //printf("%x %x %x %x\n", pSrc[0], pSrc[1], pSrc[2], pSrc[3]);
                *pDest++ = ' ';
                pSrc += 4;
            }
            else // unknown!
            {
                //printf("%c\n", *pSrc);
                *pDest++ = *pSrc++;;
            }
        }
        else
        {
            // no encoding
            //printf("%c\n", *pSrc);
            *pDest++ = *pSrc++;;
        }
    }
    *pDest++ = 0;
}

/**
 * \author      Pete McCormick
 *
 * \date        02/21/06
 *
 * \return      void
 *
 * \retval      void
 *
 * \brief       Takes a UTF-8 string and converts it to a UNICODE string.
 *
 * \param       pDest
 * \param       pSrc
 * \param       srcLen (bytes)
 *
 * \note        srclen is number of bytes of source to interpret
 *
 */
void UTF8ToUnicode(UINT16 * pDest, char * p, UINT32 srcLen)
{
    unsigned char * pSrc = (unsigned char *)p;
    UINT16 temp1;
    UINT16 temp2;
    UINT16 temp3;

    while(*pSrc && (pSrc-(unsigned char *)p < srcLen))
    {
        if(*pSrc & 0x80)
        {
            // something is encoded
            if((*pSrc & 0xe0) == 0xc0)
            {
                // two-byte encoding
                //printf("%x %x ==> ", pSrc[0], pSrc[1]);
                // 110zzzzx 10xxxxxx ==>
                // zzz zxxxxxxx
                temp1 = (UINT16)(pSrc[0] & 0x1f);
                temp1 <<= 6;
                temp2 = (UINT16)(pSrc[1] & 0x3f);
                //printf("%04x\n", temp1|temp2);
                *pDest++ = (temp1 | temp2);
                pSrc += 2;
            }
            else if((*pSrc & 0xf0) == 0xe0)
            {
                // 3-byte encoding
                //printf("%x %x %x ==> ", pSrc[0], pSrc[1], pSrc[2]);
                // 1110zzzz 10zxxxxx 10xxxxxx ==>
                // zzzzzxxx xxxxxxxx
                temp1 = (UINT16)(pSrc[0] & 0x0f);
                temp1 <<= 12;
                temp2 = (UINT16)(pSrc[1] & 0x3f);
                temp2 <<= 6;
                temp3 = (UINT16)(pSrc[2] & 0x3f);
                //printf("%04x\n", temp1|temp2|temp3);
                *pDest++ = (temp1 | temp2 | temp3);
                pSrc += 3;
            }
            else if((*pSrc & 0xf8) == 0xf0)
            {
                // 4-byte encoding - ignore?
                //printf("%x %x %x %x\n", pSrc[0], pSrc[1], pSrc[2], pSrc[3]);
                pSrc += 4;
            }
            else // unknown!
            {
                //printf("%x ==> %04x\n", pSrc[0], pSrc[0]);
                *pDest++ = *pSrc++;;
            }
        }
        else
        {
            //printf("%x ==> %04x\n", pSrc[0], pSrc[0]);
            // no encoding
            *pDest++ = *pSrc++;;
        }
    }
    *pDest++ = 0;
}

/**
 * \author      Pete McCormick
 *
 * \date        02/21/06
 *
 * \return      void
 *
 * \retval      void
 *
 * \brief       Saves the specified UTF8 conversion option to permanent storage
 *
 * \param       option
 *
 * \note
 *
 */
void UTF8SaveOption(UTF8OPT option)
{
//    char * section = UTF8_CFG_SECTION_NAME;
//    char optionTxt[32];
//
//    if(NumberGetString(UTF8Options, option, optionTxt, sizeof(optionTxt)) != 0)
//    {
//        DmErrorPrintf("Invalid UTF8 option %u!", option);
//        return;
//    }
//
//    if(WritePrivateProfileString(section, "UTF8", optionTxt, CHARCONV_CFG_FILE) == FALSE)
//    {
//        DmErrorPrintf("Could not save UTF8 option!");
//    }
}

/**
 * \author      Pete McCormick
 *
 * \date        02/21/06
 *
 * \return      UTF8OPT
 *
 * \retval      Current UTF8 conversion option
 *
 * \brief       Returns the current UTF8 conversion option
 *
 * \param       void
 *
 * \note
 *
 */
UTF8OPT UTF8GetOption(void)
{
    return UTF8.convert;
}

/**
 * \author      Pete McCormick
 *
 * \date        02/21/06
 *
 * \return      int
 *
 * \retval      0: success
 * \retval      <0: failure
 *
 * \brief       Restores the UTF8 conversion option from permanent storage
 *
 * \param       void
 *
 * \note
 *
 */
int CharConvRestoreConfig(void)
{
//    char temp[64];
//    char * section_name = UTF8_CFG_SECTION_NAME;
//    int param;
//
//    GetPrivateProfileString(section_name, "UTF8", "ASCII", temp, sizeof(temp), CHARCONV_CFG_FILE);
//    if(StringGetNumber(UTF8Options, temp, (UINT32 *)&UTF8.convert) != 0)
//    {
//        UTF8.convert = UTF8_TO_ASCII;
//    }
//
    return 0;
}

/**
 * \author      Pete McCormick
 *
 * \date        02/21/06
 *
 * \return      void
 *
 * \retval      void
 *
 * \brief       Initializes the option to factory-default settings.
 *
 * \param       void
 *
 * \note
 *
 */
void CharConvInitDefaults(void)
{
    //DeleteFile(CHARCONV_CFG_FILE);
}

/**
 * \author      Pete McCormick
 *
 * \date        02/21/06
 *
 * \return      void
 *
 * \retval      void
 *
 * \brief       Console command to query and set the UTF conversion option
 *
 * \param       void
 *
 * \note
 *
 */
int UTF8Cmd(int ignore, char * cmd)
{
    UTF8OPT option;
    char optionTxt[32];

    if (*cmd == '?')
    {
        DmConsolePrintf("UTF8 [ASCII]\r");
        DmConsolePrintf("\tASCII - convert outbound UTF-8 strings to ASCII\r");
        //DmConsolePrintf("\tUNICODE - convert outbound UTF-8 strings to Unicode\r");
        return 0;
    }

    if(*cmd == 0)
    {
        option = UTF8GetOption();

        if(NumberGetString(UTF8Options, option, optionTxt, sizeof(optionTxt)) != 0)
        {
            DmConsolePrintf("Invalid UTF-8 option %u!\r", option);
            return -1;
        }
        DmConsolePrintf("UTF-8 option: %s\r", optionTxt);
        return 0;
    }

    LocalConvertEntireStringToUpper(cmd);

    if(StringGetNumber(UTF8Options, cmd, (UINT32*)&option, 0) != 0)
    {
        DmConsolePrintf("Unrecognized UTF-8 option %s!\r", cmd);
        return -1;
    }

    UTF8SaveOption(option);
    DmConsolePrintf("UTF-8 option set to %s.\r", cmd);
    return 0;
}

/**
 * \author      Pete McCormick
 *
 * \date        01/31/2006
 *
 * \return      void
 *
 * \retval      void
 *
 * \brief       Performs conversion of non-printable ASCII characters to
 *              printable ones according to ISO 8859-1 standard.
 *
 * \param       p
 *
 * \warning     Conversion overwrites source string.
 */
void AsciiToIso8859(char * p)
{
    UINT32 index;

    while(*p)
    {
        index = *((unsigned char *)p);
        *p = Iso8859_1Conv[index];
        p++;
    }
}
#endif